import numpy
import math
import scipy
import scipy.io
import scipy.misc
import scipy.cluster
import scipy.cluster.vq
import matplotlib
import matplotlib.pyplot
import os
import IPython
import IPython.parallel
import itertools
import random
import sklearn
import sklearn.decomposition
import sklearn.manifold
import sklearn.cluster
import sklearn.feature_extraction
import sklearn.feature_extraction.text
base_path = "/u/mlrobert/code/local/2013_drawing_assistant/data/"
local_feature_corpus_path_name_ext = \
base_path+"local_feature_corpus/bunny2/"+ \
"apparent_ridges.num_latitude_lines=20.num_longitude_lines=20.gabor.num_thetas=08.galif.patch_width=15.num_samples=32.num_tiles=04.num_samples=1000000/"+ \
"local_feature_corpus.mat"
local_feature_cluster_centroids_path = \
base_path+"local_feature_cluster_centroids/bunny2/"+ \
"apparent_ridges.num_latitude_lines=20.num_longitude_lines=20.gabor.num_thetas=08.galif.patch_width=15.num_samples=32.num_tiles=04.num_samples=1000000.k=5000/"
if not os.path.exists(local_feature_cluster_centroids_path):
os.makedirs(local_feature_cluster_centroids_path)
local_feature_cluster_centroids_path_name_ext = local_feature_cluster_centroids_path+"local_feature_cluster_centroids.mat"
local_features_mat = scipy.io.loadmat(local_feature_corpus_path_name_ext)
local_features = local_features_mat["local_features"]
print local_features.shape
(1000000, 128)
codebook_size = 5000
k = codebook_size
seed = 0
k_means_clustering = sklearn.cluster.MiniBatchKMeans(k=k, random_state=seed, verbose=True, compute_labels=False)
k_means_clustering.fit(local_features)
-c:5: DeprecationWarning: Parameter k has been replaced by 'n_clusters' and will be removed in release 0.14. /usr/local/lib/python2.6/dist-packages/scikit_learn-0.13-py2.6-linux-x86_64.egg/sklearn/cluster/k_means_.py:1176: RuntimeWarning: init_size=300 should be larger than k=5000. Setting it to 3*k init_size=init_size)
Init 1/3 with method: k-means++ Inertia for init 1/3: 0.439552 Init 2/3 with method: k-means++ Inertia for init 2/3: 0.188259 Init 3/3 with method: k-means++ Inertia for init 3/3: 0.222177 Minibatch iteration 1/1000000:mean batch inertia: 0.030177, ewa inertia: 0.030177 Minibatch iteration 2/1000000:mean batch inertia: 0.030318, ewa inertia: 0.030177 Minibatch iteration 3/1000000:mean batch inertia: 0.029789, ewa inertia: 0.030176 Minibatch iteration 4/1000000:mean batch inertia: 0.035389, ewa inertia: 0.030178 Minibatch iteration 5/1000000:mean batch inertia: 0.031143, ewa inertia: 0.030178 Minibatch iteration 6/1000000:mean batch inertia: 0.035013, ewa inertia: 0.030179 Minibatch iteration 7/1000000:mean batch inertia: 0.028210, ewa inertia: 0.030178 Minibatch iteration 8/1000000:mean batch inertia: 0.032941, ewa inertia: 0.030179 Minibatch iteration 9/1000000:mean batch inertia: 0.029968, ewa inertia: 0.030179 [_mini_batch_step] Reassigning 4826 cluster centers. Minibatch iteration 10/1000000:mean batch inertia: 0.034429, ewa inertia: 0.030180 Minibatch iteration 11/1000000:mean batch inertia: 0.056701, ewa inertia: 0.030185 Minibatch iteration 12/1000000:mean batch inertia: 0.061453, ewa inertia: 0.030191 Minibatch iteration 13/1000000:mean batch inertia: 0.049665, ewa inertia: 0.030195 Converged (lack of improvement in inertia) at iteration 13/1000000
MiniBatchKMeans(batch_size=100, compute_labels=False, init='k-means++', init_size=None, k=5000, max_iter=100, max_no_improvement=10, n_clusters=5000, n_init=3, random_state=0, reassignment_ratio=0.01, tol=0.0, verbose=True)
local_feature_cluster_centroids = k_means_clustering.cluster_centers_
print local_feature_cluster_centroids.shape
(5000, 128)
scipy.io.savemat(local_feature_cluster_centroids_path_name_ext, {"local_feature_cluster_centroids":local_feature_cluster_centroids}, oned_as="column")